version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220909          // set seed
set scheme cleanplots      // clean plots

local pgm  "dst-Figure_3_binscatter_observables"                     // file name
local who  "Muzhe Yang"                                              // author
local dte  "2025-01-20"                                              // created date
local dte2 "`c(current_date)'"                                       // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\analysis\figures\\`pgm'.txt", text replace 
display "`tag'"

**# data prep ------------------------------------------------------------------------------------

use "data_clean\dst-data04_for_estimation_within_250_miles", clear
keep if wave == 2

*## (1) create the 9 cells, following page 215 of the following paper:
*## Giuntella, O. and F. Mazzonna (2019). "Sunset Time and the Economic Effects of Social Jetlag: Evidence from US Time Zone Borders." Journal of Health Economics 65: 210-226.

assert !missing(centroid_lat)
assert !missing(region)
gen     cell = 1 if centroid_lat > 40                        & region == "eastern and central"
replace cell = 2 if (34 < centroid_lat & centroid_lat <= 40) & region == "eastern and central"
replace cell = 3 if centroid_lat <= 34                       & region == "eastern and central"
replace cell = 4 if centroid_lat > 40                        & region == "central and mountain"
replace cell = 5 if (34 < centroid_lat & centroid_lat <= 40) & region == "central and mountain"
replace cell = 6 if centroid_lat <= 34                       & region == "central and mountain"
replace cell = 7 if centroid_lat > 40                        & region == "mountain and pacific"
replace cell = 8 if (34 < centroid_lat & centroid_lat <= 40) & region == "mountain and pacific"
replace cell = 9 if centroid_lat <= 34                       & region == "mountain and pacific"
tab cell time_zone, missing
summ centroid_lat if cell == 1 | cell == 4 | cell == 7
summ centroid_lat if cell == 2 | cell == 5 | cell == 8
summ centroid_lat if cell == 3 | cell == 6 | cell == 9

*## (2) observables

global x "pop white_pct black_pct hispanic_pct pop_under_18yr_pct pop_65yr_over_pct age_median educ_hs_pct educ_coll_pct married_pct hh_size hh_income_median home_value_median no_health_ins_pct unemploy_pct"
des $x

**# estimation prep -----------------------------------------------------------------------

encode CountyFIPS, gen(county_fips)
egen nomiss = rowmiss($x dist_to_border centroid_lat daylight_dur_max)
assert !missing(StateAbbr)
local radius = 50
gen sample_selection = (nomiss == 0 & StateAbbr != "AZ" & dist_to_border <= `radius')

**# figure ----------------------------------------------------------------------------------------------

rdplot pop dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("Population (in 1,000)", size(*0.9) span) ///
       legend(off) name(pop, replace)
drop rdplot_*
                     
rdplot white_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population that is white", size(*0.9) span) ///
       legend(off) name(white_pct, replace)
drop rdplot_*

rdplot black_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population that is black", size(*0.9) span) ///
       legend(off) name(black_pct, replace)
drop rdplot_*

rdplot hispanic_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population that is Hispanic or Latino", size(*0.9) span) ///
       legend(off) name(hispanic_pct, replace)
drop rdplot_*

rdplot pop_under_18yr_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population less than 18 years", size(*0.9) span) ///
       legend(off) name(pop_under_18yr_pct, replace)
drop rdplot_*

rdplot pop_65yr_over_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population 65 years or older", size(*0.9) span) ///
       legend(off) name(pop_65yr_over_pct, replace)
drop rdplot_*

rdplot age_median dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("Median age", size(*0.9) span) ///
       legend(off) name(age_median, replace)
drop rdplot_*

rdplot educ_hs_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population aged 25+ with high school degree only", size(*0.9) span) ///
       legend(off) name(educ_hs_pct, replace)
drop rdplot_*

rdplot educ_coll_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population aged 25+ with bachelor's degree or higher", size(*0.9) span) ///
       legend(off) name(educ_coll_pct, replace)
drop rdplot_*

rdplot married_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population being married", size(*0.9) span) ///
       legend(off) name(married_pct, replace)
drop rdplot_*

rdplot hh_size dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("Average household size", size(*0.9) span) ///
       legend(off) name(hh_size, replace)
drop rdplot_*

rdplot hh_income_median dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("Median household income (in $1,000)", size(*0.9) span) ///
       legend(off) name(hh_income_median, replace)
drop rdplot_*

rdplot home_value_median dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("Median home value (in $1,000)", size(*0.9) span) ///
       legend(off) name(home_value_median, replace)
drop rdplot_*

rdplot no_health_ins_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population without health insurance", size(*0.9) span) ///
       legend(off) name(no_health_ins_pct, replace)
drop rdplot_*

rdplot unemploy_pct dist_to_border_signed if sample_selection == 1, c(0) nbins(20 20) p(0) kernel(uni) ci(95) hide genvars 
twoway (scatter rdplot_mean_y rdplot_mean_x, sort msymbol(smcircle) mcolor(gray)) ///
       (line rdplot_ci_l rdplot_mean_x, sort lcolor(midblue) lpattern(solid)) ///
       (line rdplot_ci_r rdplot_mean_x, sort lcolor(midblue) lpattern(solid)), ///
       xline(0, lpattern(dash) lwidth(thin) lcolor(black)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       title("% of population being unemployed", size(*0.9) span) ///
       legend(off) name(unemploy_pct, replace)
drop rdplot_*

graph combine pop white_pct black_pct hispanic_pct pop_under_18yr_pct pop_65yr_over_pct age_median educ_hs_pct educ_coll_pct married_pct hh_size hh_income_median home_value_median no_health_ins_pct unemploy_pct, ///
      cols(3) iscale(*0.8) xcommon ///
      title("All Time Zones in the Contiguous United States", size(*0.8) span)

graph save "code\analysis\figures\\`pgm'", replace
graph export "code\analysis\figures\\`pgm'.png", replace
graph export "code\analysis\figures\\`pgm'.emf", replace

log close
exit